/***
This do-file creates several data files used in the stimulus analysis.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"

* Create required subfolders
cap mkdir "${root}/data"
cap mkdir "${root}/data/derived"
cap mkdir "${root}/data/derived/Policy"
cap mkdir "${root}/data/derived/Policy/Treasury Statements"
cap mkdir "${root}/results"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/Policy"

cap erase "${root}/results/paper numbers/Policy/Stimulus Distribution Shares.yaml"

*-------------------------------------------------------------------------------
**# 1. Stimulus eligibility rates, by stimulus round and ZIP code
*-------------------------------------------------------------------------------

*--------------------------
* Loop over stimulus rounds 
*--------------------------
foreach round in april january march {

	*--------------------------
	* Load number of people in both types of household
	*--------------------------
	* Import data 
	project, uses("${root}/data/dvc/NHGIS/nhgis0003_ds239_20185_2018_zcta.csv")
	import delimited "${root}/data/dvc/NHGIS/nhgis0003_ds239_20185_2018_zcta.csv", clear 

	* Keep relevant 
	keep zcta5a ajxhe003 ajxhe024
	rename ajxhe003 count_in_family_hh
	rename ajxhe024 count_in_non_family_hh

	* Save as tempfile 
	tempfile counts 
	save `counts'

	*--------------------------
	* Load number of households by income bin x household type 
	*--------------------------
	* Import raw data
	project, uses("${root}/data/dvc/NHGIS/nhgis0009_ds239_20185_2018_zcta.csv")
	import delimited "${root}/data/dvc/NHGIS/nhgis0009_ds239_20185_2018_zcta.csv", clear

	* Only keep vars we'll use
	keep zcta5a ajwme001 ajy9e0* ajzae001 ajxke003 ajxke001 

	* Merge on counts 
	merge 1:1 zcta5a using `counts', assert(3) nogen 

	* Define marriage rate in terms of people, not households 
	replace ajxke003 = count_in_family_hh
	replace ajxke001 = (count_in_family_hh + count_in_non_family_hh) 

	* Create marriage rate 
	g marriage_rate =  ajxke003 / ajxke001 // married households / all households
	drop ajxke003 ajxke001

	* Rename variables 
	ren ajwme001 popsize
	ren ajzae001 med_hhinc

	/*------------------------------------------------------------------
	We regress marriage rate on median household income at the zcta level, weighting by population.
	We then get a predicted marriage rate for each ACS income bin using the mid-point. E.g. for the 75K-100K bin, the predicted marriage rate is between_zip_constant + 87.5K * between_zip_slope.
	
	We then get the eligible household counts as follows;
		Everyone below 75K is eligible
		For the 75K-100K bin, the eligible count is
			weight * (1 - predicted marriage rate) * number of households + predicted marriage rate * number of households
		For the 100-150K bins, the eligible count is predicted marriage rate * number of households
	For the 150-200K bin the eligible count is predicted marriage rate * number of households * weight
	
	See below for definition of "weight"
	
	We then sum the eligible counts across the bins and divide by the total number of households.
	------------------------------------------------------------------*/
	reg marriage_rate med_hhinc [w = popsize]
	scalar pred_marry_13 = _b[_cons] + 87500 * _b[med_hhinc]
	scalar pred_marry_14 = _b[_cons] + 112500 * _b[med_hhinc]
	scalar pred_marry_15 = _b[_cons] + 137500 * _b[med_hhinc]
	scalar pred_marry_16 = _b[_cons] + 175000 * _b[med_hhinc]

	g eligible_best_guess_vary = 0 
	forv i = 2/9 {
		replace eligible_best_guess_vary = eligible_best_guess_vary + ajy9e00`i'
	} 
	forv i = 10/12 {
		replace eligible_best_guess_vary = eligible_best_guess_vary + ajy9e0`i'
	}

	* In the 75-100K bin -- weight varies by round
	* Assuming uniform distribution within bin, assign weight = (threshold for no stim check - 75K) / (99,999 - 75K) * 0.5
	
		// Round 1 ($1200 Checks): singles: begins fading out at 75K, fully faded out by 99K. 
								// couples: always receive full amount. 
		if "`round'" == "april" {
		replace eligible_best_guess_vary = eligible_best_guess_vary + ///
										   (99000 - 75000) / (99999 - 75000) * 0.5 * (1 - pred_marry_13) * ajy9e013 + ///
										   pred_marry_13 * ajy9e013	
		}
		// Round 2 ($600 Checks): singles: begins fading out at 75K, fully faded out by 87K. 
								// couples: always receive full amount. 
		if "`round'" == "january" {
		replace eligible_best_guess_vary = eligible_best_guess_vary + ///
										   (87000 - 75000) / (99999 - 75000) * 0.5 * (1 - pred_marry_13) * ajy9e013 + ///
										   pred_marry_13 * ajy9e013	
		}	
		// Round 3 ($1400 Checks): singles: begins fading out at 75K, fully faded out by 80K.
								// couples: always receive full amount. 
		if "`round'" == "march" {
		replace eligible_best_guess_vary = eligible_best_guess_vary + ///
										   (80000 - 75000) / (99999 - 75000) * 0.5 * (1 - pred_marry_13) * ajy9e013 + ///
										   pred_marry_13 * ajy9e013	
		}	
	
	* In the 100-150K bin -- weight is 0 for singles and 1 for couples across rounds. 
	replace eligible_best_guess_vary = eligible_best_guess_vary + ///
									   pred_marry_14 * ajy9e014
	replace eligible_best_guess_vary = eligible_best_guess_vary + ///
									   pred_marry_15 * ajy9e015
									   
	* In the 150-200K bin -- weight varies by round. 
	* Assuming uniform distribution within bin, assign weight = (threshold for no stim check - 150K) / (199,999 - 150K)
	
		// Zero weight for singles. 
		// For couples: Round 1 ($1200 Checks): begins fading out at 150K, fully faded out by 198K.
		if "`round'" == "april" {
			replace eligible_best_guess_vary = eligible_best_guess_vary + ///
										   (198000 - 150000) / (199999 - 150000) * 0.5 * pred_marry_16 * ajy9e016
		}
		// Round 2 ($600 Checks): begins fading out at 150K, fully faded out by 174K.
		if "`round'" == "january" {
			replace eligible_best_guess_vary = eligible_best_guess_vary + ///
										   (174000 - 150000) / (199999 - 150000) * 0.5 * pred_marry_16 * ajy9e016
		}	
		// Round 3 ($1400 Checks): begins fading out at 150K, fully faded out by 160K.
		if "`round'" == "march" {
			replace eligible_best_guess_vary = eligible_best_guess_vary + ///
										   (160000 - 150000) / (199999 - 150000) * 0.5 * pred_marry_16 * ajy9e016
		}	
									  
	replace eligible_best_guess_vary = eligible_best_guess_vary / ajy9e001

	* Keep relevant 
	keep zcta5a eligible_best_guess_vary

	* Output  
	gisid zcta5a 
	rename zcta5a zcta 
	keep zcta eligible_best_guess_vary 
	rename eligible_best_guess_vary eligibility_rate  
	
	gen round = "`round'"
	if "`round'" == "april" {
		tempfile eligibility_data 
		save `eligibility_data', replace
	}
	else {
		append using `eligibility_data'
		save `eligibility_data', replace
	}
}

use `eligibility_data', clear 
gisid zcta round 
order round zcta eligibility_rate
sort round zcta
save "${root}/data/derived/Policy/stimulus eligibility by ZCTA.dta", replace
project, creates("${root}/data/derived/Policy/stimulus eligibility by ZCTA.dta")

*-------------------------------------------------------------------------------
**# 2. Share of EIP distributed by a given date, by stimulus round 
*-------------------------------------------------------------------------------

* ------------------------------
* Analyze daily Treasury statements 
* ------------------------------

* Loop over rounds 
foreach round in april january march {

	* ------------------------------
	* Set total amount allocated  
	* ------------------------------
	if "`round'" == "april" local allocated 271.4     // billion; source https://www.irs.gov/pub/irs-pdf/p55b.pdf
	if "`round'" == "january" local allocated 141.5   // billion; source https://www.irs.gov/pub/irs-pdf/p55b.pdf
	if "`round'" == "march" local allocated 401.5     // billion; source https://www.irs.gov/pub/irs-pdf/p55b.pdf
	
	* ------------------------------
	* Download Treasury files 
	* ------------------------------

	* Get files relevant to April stimulus 
	if "`round'" == "april" {
	
		forvalues i = 1/31 {
	
			* Set URL name 
			if `i' < 9 local url "https://fsapps.fiscal.treasury.gov/dts/files/20040`i'00.xlsx"
			if `i' >= 10 local url "https://fsapps.fiscal.treasury.gov/dts/files/2004`i'00.xlsx"
			
			* Copy to Treasury folder - capture exception when no treasury statement on a given date
			cap {
				copy `url' "${root}/data/derived/Policy/Treasury Statements/Statement April 2020 `i'.xlsx", replace	
				project, creates("${root}/data/derived/Policy/Treasury Statements/Statement April 2020 `i'.xlsx")
			}
		}
		
		forvalues i = 1/31 {
	
			* Set URL name 
			if `i' < 9 local url "https://fsapps.fiscal.treasury.gov/dts/files/20050`i'00.xlsx"
			if `i' >= 10 local url "https://fsapps.fiscal.treasury.gov/dts/files/2005`i'00.xlsx"
			
			* Copy to Treasury folder - capture exception when no treasury statement on a given date
			cap {
				copy `url' "${root}/data/derived/Policy/Treasury Statements/Statement May 2020 `i'.xlsx", replace	
				project, creates("${root}/data/derived/Policy/Treasury Statements/Statement May 2020 `i'.xlsx")
			}
		}
	}

	* Get files relevant to January stimulus 
	if "`round'" == "january" {
	
		* Get December files 
		forvalues i = 1/31 {

			* Set URL name 
			if `i' < 9 local url "https://fsapps.fiscal.treasury.gov/dts/files/20120`i'00.xlsx"
			if `i' >= 10 local url "https://fsapps.fiscal.treasury.gov/dts/files/2012`i'00.xlsx"
			
			* Copy to Treasury folder - capture exception when no treasury statement on a given date
			cap {
				copy `url' "${root}/data/derived/Policy/Treasury Statements/Statement December `i'.xlsx", replace
				project, creates("${root}/data/derived/Policy/Treasury Statements/Statement December `i'.xlsx")
			}
		}

		* Get January files 
		forvalues i = 4/15 {

			* Set URL name 
			if `i' < 9 local url "https://fsapps.fiscal.treasury.gov/dts/files/21010`i'00.xlsx"
			if `i' >= 10 local url "https://fsapps.fiscal.treasury.gov/dts/files/2101`i'00.xlsx"
			
			* Copy to Treasury folder - capture exception when no treasury statement on a given date
			cap {
				copy `url' "${root}/data/derived/Policy/Treasury Statements/Statement January 2021 `i'.xlsx", replace
				project, creates("${root}/data/derived/Policy/Treasury Statements/Statement January 2021 `i'.xlsx")
			}
		}
	}

	* Get files relevant to March stimulus
	if "`round'" == "march" {
	
		forvalues i = 1/31 {
	
			* Set URL name 
			if `i' < 9 local url "https://fsapps.fiscal.treasury.gov/dts/files/21030`i'00.xlsx"
			if `i' >= 10 local url "https://fsapps.fiscal.treasury.gov/dts/files/2103`i'00.xlsx"
			
			* Copy to Treasury folder - capture exception when no treasury statement on a given date
			cap {
				copy `url' "${root}/data/derived/Policy/Treasury Statements/Statement March 2021 `i'.xlsx", replace	
				project, creates("${root}/data/derived/Policy/Treasury Statements/Statement March 2021 `i'.xlsx")
			}
		}
	}	
	
	* Create file of amount deposited on each day 
	local files : dir "${root}/data/derived/Policy/Treasury Statements/" files "Statement *.xlsx", respectcase
	local i = 0 
	foreach file in `files' {

		* Display file name 
		di "`file'"
		
		* Import file 
		project, uses("${root}/data/derived/Policy/Treasury Statements/`file'")
		import excel using "${root}/data/derived/Policy/Treasury Statements/`file'", clear 
		
		* Keep relevant 
		keep B D 
		rename B type 
		rename D spending_daily 
		
		* Keep EIP 
		keep if strpos(type, "IRS") > 0 
		drop if mi(type) 
		
		* Save as tempfile 
		gen date = "`file'"
		local i = `i' + 1 
		if `i' > 1 append using `eip_payments'
		tempfile eip_payments 
		save `eip_payments', emptyok replace
	}

	* Remove duplicates 
	duplicates drop  
	isid type date 

	* Reformat dates 
	rename date string_date 
	replace string_date = subinstr(string_date, "Statement ", "", 1)
	replace string_date = subinstr(string_date, ".xlsx", "", 1) 
	gen month = 12 if strpos(string_date, "December") > 0
	replace month = 1 if strpos(string_date, "January") > 0
	replace month = 3 if strpos(string_date, "March") > 0
	replace month = 4 if strpos(string_date, "April") > 0
	replace month = 5 if strpos(string_date, "May") > 0
	assert !mi(month)
	gen day = substr(string_date, -2, 2)
	replace day = subinstr(day, " ", "", 1)  
	destring day, replace
	assert !mi(day) 
	gen date = mdy(month, day, 2020) if month == 12 | month == 4 | month == 5 
	replace date = mdy(month, day, 2021) if month == 1 | month == 3 
	format date %td 

	* Destring spending 
	destring spending_daily, replace 
	
	* Collapse to categories 
	drop if type == "Estate and Gift Taxes & Misc IRS Rcpts."
	if "`round'" == "april" gen category = "EIP" if inlist(type, "IRS Tax Refunds Individual (EFT)", "IRS Tax Refunds Business (Checks)")
	if "`round'" == "january" gen category = "EIP" if inlist(type, "IRS - Economic Impact Payments (Checks)", "IRS - Economic Impact Payments (EFT)")
	if "`round'" == "march" gen category = "EIP" if inlist(type,  "IRS - Economic Impact Payments (Checks)", "IRS - Economic Impact Payments (EFT)")
	replace category = "Other IRS" if category != "EIP" 

	collapse (sum) spending_daily, by(date category) 
	replace spending_daily = spending_daily / 1000 

	* Calculate share that has been distributed 
	if "`round'" == "april" {
		sum spending_daily if category == "EIP" & date <= mdy(4, 21, 2020) 
		assert r(N) == 15
	}
	if "`round'" == "january" {
		sum spending_daily if category == "EIP" & inrange(date, mdy(1, 1, 2021), mdy(1, 10, 2021)) 
		assert r(N) == 6
	}
	if "`round'" == "march" {
		sum spending_daily if category == "EIP" & inrange(date, mdy(3, 1, 2021), mdy(3, 23, 2021)) 
		assert r(N) == 17
	}
	
	local tot_distributed = `r(sum)' 
	di "Amount distributed is " `tot_distributed' "billion"
	local share_distributed = `tot_distributed' / `allocated'
	di "Share distributed is " 100 * `share_distributed' "%"
	
	* Scalars for paper 
	if "`round'" == "april" {
		sum spending_daily if category == "EIP" & inrange(date, mdy(4, 1, 2020), mdy(4, 30, 2020))
		local stim1_apr = r(sum)
		sum spending_daily if category == "EIP" & date == mdy(4, 15, 2020)
		local stim1_apr15 = r(sum)
		
		local stim1_apr15_share = `stim1_apr15' / `stim1_apr' * 100
		
		yamlout using "${root}/results/paper numbers/Policy/Stimulus Distribution Shares.yaml", ///
			key("stim1_apr15_share") ///
			comment("Percent of Stim 1 EIP payments in April 2020 that were made on April 15") ///
			value(`stim1_apr15_share') fmt(%2.0f)
			
			
		sum spending_daily if category == "EIP" & inrange(date, mdy(4, 1, 2020), mdy(4, 21, 2020)) 
		local stim1_apr_pre = r(sum)
		
		yamlout using "${root}/results/paper numbers/Policy/Stimulus Distribution Shares.yaml", ///
			key("stim1_apr_pre") ///
			comment("Stim 1 EIP payments made before Apr 21 (billions)") ///
			value(`stim1_apr_pre') fmt(%3.0f)				
	}
	
	if "`round'" == "january" {
		sum spending_daily if category == "EIP" & inrange(date, mdy(1, 1, 2021), mdy(1, 10, 2021)) 
		local stim1_jan_pre = r(sum)
		
		yamlout using "${root}/results/paper numbers/Policy/Stimulus Distribution Shares.yaml", ///
			key("stim1_jan_pre") ///
			comment("Stim 2 payments made before Jan 10 (billions)") ///
			value(`stim1_jan_pre') fmt(%3.0f)	
	}
	
	* Output 
	gen share_distributed = `share_distributed'
	keep if _n == 1 
	keep share_distributed
	
	gen round = "`round'"
	if "`round'" == "april" gen distribution_cutoff_date = "April 21"
	if "`round'" == "january" gen distribution_cutoff_date = "January 10"
	if "`round'" == "march" gen distribution_cutoff_date = "March 23"
	
	if "`round'" == "april" {
		tempfile distribution_data 
		save `distribution_data', replace
	}
	else {
		append using `distribution_data'
		save `distribution_data', replace
	}
	
}

use `distribution_data', clear 
gisid round 
order round distribution_cutoff_date share_distributed
sort round
save "${root}/data/derived/Policy/share of EIP that has been distributed.dta", replace
project, creates("${root}/data/derived/Policy/share of EIP that has been distributed.dta")


* Scalars for paper 
foreach round in april january march {
	sum share_distributed if round == "`round'"
	local share_distributed_`round' = r(mean) * 100
		
	yamlout using "${root}/results/paper numbers/Policy/Stimulus Distribution Shares.yaml", ///
		key("share_distributed_`round'") ///
		comment("Percent of `round' EIP payments made by first week of treatment window") ///
		value(`share_distributed_`round'') fmt(%2.0f)
}

project, creates("${root}/results/paper numbers/Policy/Stimulus Distribution Shares.yaml")

*-------------------------------------------------------------------------------
**# 3. Estimate of daily spending in January 2019 by income quartile 
*-------------------------------------------------------------------------------

*---------------------------
* Create January 2019 spending in each income quartile   
*---------------------------
*---------------------------
* NIPA 2.3.5M
*---------------------------

* Load NIPA Table 2.3.5 Personal Consumption Expenditures by Major Type of Product 
project, uses("${root}/data/derived/NIPA/NIPA Table 2.3.5M Personal Consumption Expenditures by Major Type of Product.dta")
use "${root}/data/derived/NIPA/NIPA Table 2.3.5M Personal Consumption Expenditures by Major Type of Product.dta", clear

* Restrict to spending categories that appear on credit cards
gen in_affinity = 0
replace in_affinity = 1 if serieslabel == "Furnishings and durable household equipment"
replace in_affinity = 1 if serieslabel == "Recreational goods and vehicles"   
replace in_affinity = 1 if serieslabel == "Other durable goods"
replace in_affinity = 1 if serieslabel == "Food and beverages purchased for off-premises consumption"
replace in_affinity = 1 if serieslabel == "Clothing and footwear"
replace in_affinity = 1 if serieslabel == "Gasoline and other energy goods"
replace in_affinity = 1 if serieslabel == "Other nondurable goods"
replace in_affinity = 1 if serieslabel == "Transportation services"
replace in_affinity = 1 if serieslabel == "Recreation services"
replace in_affinity = 1 if serieslabel == "Food services and accommodations"
replace in_affinity = 1 if serieslabel == "Financial services and insurance"
replace in_affinity = 1 if serieslabel == "Other services"

keep if in_affinity == 1
gunique serieslabel
assert r(unique) == 12 

* Collapse all categories
gisid seriescode year month
gcollapse (sum) value, by(year month)
label var value "Nominal Annualized Card Spending, in Millions"

* Output
sort year month
gisid year month

* Load annualized credit card spending in January 2019
keep if year == 2019 & month == 1
assert _N == 1

* Calculate daily credit card spending in January 2019
sum value 
local jan_val_2019 = `r(mean)' / 365 * 1E6  								    // converting annualized spending in millions to daily spending in dollars

*---------------------------
* Calculate January Affinity value  
*---------------------------
* Load data 
project, uses("${root}/data/web/data/Affinity Income Shares - National - 2019.csv")
import delimited "${root}/data/web/data/Affinity Income Shares - National - 2019.csv", clear

keep income_quartile share_jan2019
rename income_quartile income_q
gisid income_q 

gen y19_spending_snorm = `jan_val_2019' * share_jan2019                         // overall spending level in Jan 2019 * share of spending in Jan 2019 for each income quartile

* Output 
save "${root}/data/derived/Policy/daily spending by income quartile.dta", replace 
project, creates("${root}/data/derived/Policy/daily spending by income quartile.dta")

*-------------------------------------------------------------------------------
**# 4. National income quartile X day data, demeaned on day-of-week FEs 
*-------------------------------------------------------------------------------
project, uses("${root}/data/web/data/Affinity Daily Total Spending - National.csv")
import delimited "${root}/data/web/data/Affinity Daily Total Spending - National.csv", clear 
	
gen date = mdy(month, day, year)
format %td date
rename daily_spend_19_all daily_spend_19_q5 
keep daily_spend_19_q1 daily_spend_19_q2 daily_spend_19_q3 daily_spend_19_q4 daily_spend_19_q5 date 
	
gisid date 
keep if inrange(date, mdy(1, 1, 2019), mdy(4, 18, 2021))                        // range of relevant dates
	
greshape long daily_spend_19_q, i(date) j(income_q)
rename daily_spend_19_q snorm

gen dow = dow(date)
gen day = day(date)
sort income_q date 
	
* Residualize normed spending with respect to day-of-week FEs calculated over 2019
gegen temp = mean(snorm) if year(date) == 2019, by(dow income_q)
gegen mean_snorm_2019_dow = mean(temp), by(dow income_q)
assert temp == mean_snorm_2019_dow if year(date) == 2019
drop temp 

* Add back the income-quartile specific mean of normed spending in 2019 so we regain level differences
gegen temp = mean(snorm) if year(date) == 2019, by(income_q)
gegen mean_snorm_2019 = mean(temp), by(income_q)
assert temp == mean_snorm_2019 if year(date) == 2019
gen dow_resid_2019 = snorm - mean_snorm_2019_dow +  mean_snorm_2019
drop temp mean_snorm_2019_dow mean_snorm_2019

* Replace easter sunday with adjacent day values
gisid income_q date
sort income_q date
tsset income_q date
replace dow_resid_2019 = (L.dow_resid_2019 + F.dow_resid_2019) / 2 if date == mdy(4, 21, 2019)
replace dow_resid_2019 = (L.dow_resid_2019 + F.dow_resid_2019) / 2 if date == mdy(4, 12, 2020)
replace dow_resid_2019 = (L.dow_resid_2019 + F.dow_resid_2019) / 2 if date == mdy(4, 4, 2021) 
	
save "${root}/data/derived/Policy/Affinity - National - Daily - demeaned.dta", replace
project, creates("${root}/data/derived/Policy/Affinity - National - Daily - demeaned.dta")


*-------------------------------------------------------------------------------
**# 5. Create dataset for diff-in-diff 
*-------------------------------------------------------------------------------

*-------------------------------------------------------------------------------
**## 5.1. Calculate spending levels per stimulus recipients by income quartile and stimulus round 
*-------------------------------------------------------------------------------

* Load data on eligibility rates by round and ZCTA 
project, uses("${root}/data/derived/Policy/stimulus eligibility by ZCTA.dta")
use "${root}/data/derived/Policy/stimulus eligibility by ZCTA.dta", clear 
gisid round zcta                                                       

* Merge on ZCTA populations 
project, uses("${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta")
merge m:1 zcta using "${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta", assert(3) keepusing(medhhinc_2014_2018_est pop_2014_2018_est) nogen 
rename (medhhinc_2014_2018_est pop_2014_2018_est)(med_inc_2018 pop_2018)

* Create ZCTA income quartiles
xtile income_q = med_inc_2018 [w = pop_2018], nquantiles(4)

* Calculate total people eligible by round and income quartile 
gen people_eligible = eligibility_rate * pop_2018
gcollapse (sum) people_eligible, by(round income_q) 
drop if mi(income_q) 

* Calculate total stimulus recipients by round and income quartile 
project, uses("${root}/data/derived/Policy/share of EIP that has been distributed.dta")
merge m:1 round using "${root}/data/derived/Policy/share of EIP that has been distributed.dta", assert(3) keepusing(share_distributed) nogen 
gen recipients = people_eligible * share_distributed

* For each stimulus round, get spending per recipient in each income quartile (where we define spending as baseline spending in Jan 2019)
project, uses("${root}/data/derived/Policy/daily spending by income quartile.dta")
merge m:1 income_q using "${root}/data/derived/Policy/daily spending by income quartile.dta", assert(3) keepusing(y19_spending_snorm) nogen
gen spend_pp = y19_spending_snorm / recipients 


gisid round income_q 
tempfile spending_per_recipient 
save `spending_per_recipient'

*-------------------------------------------------------------------------------
**## 5.2. Create diff-in-diff variables 
*-------------------------------------------------------------------------------

* Load data on changes in spending, residualized on DoW FEs, for each income quartile 
project, uses("${root}/data/derived/Policy/Affinity - National - Daily - demeaned.dta")
use income_q date dow_resid_2019 using "${root}/data/derived/Policy/Affinity - National - Daily - demeaned.dta", clear
	
* Confirm structure 
gisid income_q date 
sort income_q date 

* Time variables 
gen year = year(date) 
gen month = month(date)
gen day = day(date)

* Duplicate dataset for each stimulus round 
tempfile spending_changes 
save `spending_changes'

gen round = "april"
append using `spending_changes'
replace round = "january" if missing(round)
append using `spending_changes'
replace round = "march" if missing(round)
gisid round income_q date 
sort round income_q date

* We no longer use the pooled income category 
drop if income_q == 5

* Merge in spending levels per recipient 
merge m:1 round income_q using `spending_per_recipient', assert(3) keepusing(spend_pp) nogen

*------------------------------------------------------------------------------*
* Define the stimulus date for each stimulus round 
gen stim_date = mdy(4, 15, 2020) if round == "april"
replace stim_date = mdy(1, 4, 2021) if round == "january"
replace stim_date = mdy(3, 17, 2021) if round == "march"

* Define the stimulus date corresponding to the control year for each stimulus round 
gen stim_date_control = mdy(4, 15, 2019) if round == "april"
replace stim_date_control = mdy(1, 4, 2020) if round == "january"
replace stim_date_control = mdy(3, 17, 2019) if round == "march"

format %td stim_date stim_date_control

** For each stimulus round, define the set of dates in the control and treatment periods 
* For April 2020, we exclude April 14 2020 since it is a partially-treated date (and correspondingly, April 14 2019) 
* We use a pre-period of 25 days (excluding April 14) and a post-period of 25 days (counting the stimulus date itself)
gen analysis_window = inrange(date, stim_date - 25, stim_date + 24) | inrange(date, stim_date_control - 25, stim_date_control + 24) if round == "april"
replace analysis_window = 0 if inlist(date, mdy(4, 14, 2020), mdy(4, 14, 2019)) & round == "april"

* For Jan 2021, we use a pre-period of 11 days (to avoid the holiday period) and 
* a post-period of 16 days (counting the stimulus date itself) to reflect the data we had when we first published our analysis for Stim 2
replace analysis_window = inrange(date, mdy(12, 4, 2020), mdy(1, 19, 2021)) | inrange(date, mdy(12, 4, 2019), mdy(1, 19, 2020)) if round == "january"
replace analysis_window = 0 if (inrange(date, mdy(12, 15, 2020), mdy(1, 3, 2021)) | inrange(date, mdy(12, 15, 2019), mdy(1, 3, 2020))) & round == "january"

* For March 2021, we exclude March 13-16 2021 as they are partially-treated dates (and correspondingly, March 13-16 2019)
* We use a pre-period of 25 days (excluding March 13-16 2021) and a post-period of 25 days (counting the stimulus date itself)
replace analysis_window = inrange(date, stim_date - 25, stim_date + 24) | inrange(date, stim_date_control - 25, stim_date_control + 24) if round == "march"
replace analysis_window = 0 if (inrange(date, mdy(3, 13, 2021), mdy(3, 16, 2021)) | inrange(date, mdy(3, 13, 2019), mdy(3, 16, 2019))) & round == "march"

* Create treatment indicator (post-Covid)
gen treat = (date > mdy(3, 1, 2020))

* Create post indicator (post-stimulus or post-stimulus control, within the respective years)
gen post = 1 if (date >= stim_date & year == 2020) & round == "april" & analysis_window == 1
replace post = 1 if (date >= stim_date_control & year == 2019) & round == "april" & analysis_window == 1
replace post = 1 if (date >= stim_date & month == 1 & year == 2021) & round == "january" & analysis_window == 1 
replace post = 1 if (date >= stim_date_control & month == 1 & year == 2020) & round == "january" & analysis_window == 1 
replace post = 1 if (date >= stim_date & year == 2021) & round == "march" & analysis_window == 1
replace post = 1 if (date >= stim_date_control & year == 2019) & round == "march" & analysis_window == 1
replace post = 0 if missing(post) & analysis_window == 1 
assert missing(post) if analysis_window == 0

* Make sure we have the expected number of dates in each analysis window 
gunique date if post == 0 & year == 2020 & round == "april"
assert r(unique) == 24 
gunique date if post == 1 & year == 2020 & round == "april"
assert r(unique) == 25 
gunique date if post == 0 & year == 2019 & round == "april"
assert r(unique) == 24 
gunique date if post == 1 & year == 2019 & round == "april"
assert r(unique) == 25 

gunique date if post == 0 & year == 2020 & round == "january"
assert r(unique) == 11 
gunique date if post == 1 & year == 2021 & round == "january"
assert r(unique) == 16 
gunique date if post == 0 & year == 2019 & round == "january"
assert r(unique) == 11 
gunique date if post == 1 & year == 2020 & round == "january"
assert r(unique) == 16 

gunique date if post == 0 & year == 2021 & round == "march"
assert r(unique) == 21 
gunique date if post == 1 & year == 2021 & round == "march"
assert r(unique) == 25 
gunique date if post == 0 & year == 2019 & round == "march"
assert r(unique) == 21 
gunique date if post == 1 & year == 2019 & round == "march"
assert r(unique) == 25 

gunique date if analysis_window == 1 & round == "april"
assert r(unique) == (24 + 25) * 2
gunique date if analysis_window == 1 & round == "january"
assert r(unique) == (11 + 16) * 2
gunique date if analysis_window == 1 & round == "march"
assert r(unique) == (21 + 25) * 2

*------------------------------------------------------------------------------*
* Create indicator for being within the first five days of the post-period 
gen first_five = (inrange(date, stim_date, stim_date + 4) | inrange(date, stim_date_control, stim_date_control + 4)) if analysis_window == 1

* Create indicator for being after the first five days of the post-period 
gen after_first_five = (first_five == 0 & post == 1) if analysis_window == 1
assert first_five + after_first_five == post 

* Residualize with respect to a linear pre-trend for each round for April and March
* (estimated only within the analysis window, allowing for different pre-trends for the pre-Covid and post-Covid periods)
gen dow_resid_2019_notrend = .
gen dow_resid_2019_pretrend = .

foreach round in april march {
	forval treat = 0/1 {
		reg dow_resid_2019 date if post == 0 & round == "`round'" & treat == `treat'
		
		if "`round'" == "april" assert e(N) == 24 * 4
		else if "`round'" == "march" assert e(N) == 21 * 4
	
		predict detrended, resid 
		replace dow_resid_2019_notrend = detrended if round == "`round'" & treat == `treat'
		drop detrended
		
		predict linear_time_trend 
		replace dow_resid_2019_pretrend = linear_time_trend if round == "`round'" & treat == `treat'
		drop linear_time_trend
	}
}

assert !missing(dow_resid_2019_notrend) if inlist(round, "april", "march")
assert missing(dow_resid_2019_notrend) if round == "january"
replace dow_resid_2019_notrend = dow_resid_2019 if round == "january"

*------------------------------------------------------------------------------*
** Some checks that we're detrending correctly 

** Check that original value = detrended value + predicted trend, up to rounding error
assert inrange(dow_resid_2019, dow_resid_2019_notrend + dow_resid_2019_pretrend - 2E-7, dow_resid_2019_notrend + dow_resid_2019_pretrend + 2E-7) if inlist(round, "april", "march")
assert dow_resid_2019 == dow_resid_2019_notrend if round == "january"

** Check that first difference in detrended value = detrended first differences, up to rounding error 

* First difference: post-Covid vs. pre-Covid 
foreach spec in "" "_notrend" {
	
	* Normed spending residualized on DoW FEs in the control year
	gen temp = dow_resid_2019`spec' if treat == 0 & analysis_window == 1
	egen dow_resid_control`spec' = mean(temp) if analysis_window == 1, by(round income_q month day)
	assert temp == dow_resid_control`spec' if treat == 0 & analysis_window == 1
	drop temp 
	
	* First difference
	gen first_diff`spec' = dow_resid_2019`spec' - dow_resid_control`spec'
	assert first_diff`spec' == 0 if treat == 0 & analysis_window == 1
	assert !missing(first_diff`spec') if analysis_window == 1
	assert missing(first_diff`spec') if analysis_window == 0
}

* Detrended first difference 
gen notrend_first_diff = .

foreach round in april march {
	reg first_diff date if post == 0 & round == "`round'" & treat == 1
		
	* 24 days pre- for April; 21 days pre- for March
	if "`round'" == "april" assert e(N) == 24 * 4
	else if "`round'" == "march" assert e(N) == 21 * 4 
		
	predict detrended, resid 
	replace notrend_first_diff = detrended if round == "`round'" & treat == 1
	drop detrended 
}

* Assert equality of first difference in detrended value and detrended first differences
assert inrange(first_diff_notrend, notrend_first_diff - 2E-7, notrend_first_diff + 2E-7) if analysis_window == 1 & treat == 1

* Assert that we cannot reject equality in a common pre-trend across income quartiles in first differences within each stimulus round 
foreach round in april march {
	reg first_diff i.income_q##c.date if post == 0 & round == "`round'" & treat == 1, r
	
	if "`round'" == "april" assert e(N) == 24 * 4
	else if "`round'" == "march" assert e(N) == 21 * 4
	
	test 2.income_q#c.date = 3.income_q#c.date = 4.income_q#c.date
	assert r(p) > 0.8
}

** Drop diagnostic variables 
drop dow_resid_2019_pretrend dow_resid_control first_diff dow_resid_control_notrend first_diff_notrend notrend_first_diff

* Export dataset
gisid round income_q date
save "${root}/data/derived/Policy/stimulus_did_dataset.dta", replace 
project, creates("${root}/data/derived/Policy/stimulus_did_dataset.dta") 
